In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
from sklearn import datasets, linear_model
%matplotlib inline
def set_data(p, x):
temp = x.flatten()
n = len(temp[p:])
x_T = temp[p:].reshape((n, 1))
X_p = np.ones((n, p + 1))
for i in range(1, p + 1):
X_p[:, i] = temp[i - 1: i - 1 + n]
return X_p, x_T
def AR(coeff, init, T):
offset = coeff[0]
mult_coef = np.flip(coeff, 0)[:-1]
series = np.zeros(T)
for k, x_i in enumerate(init):
series[k] = x_i
for i in range(k + 1, T):
series[i] = np.sum(mult_coef * series[i - k - 1:i]) + np.random.normal() + offset
return series
def estimated_autocorrelation(x):
n = len(x)
mu, sigma2 = np.mean(x), np.var(x)
r = np.correlate(x - mu, x - mu, mode = 'full')[-n:]
result = r/(sigma2 * (np.arange(n, 0, -1)))
return result
def test_AR(x, coef, N):
x = x.flatten()
offset = coef[0]
slope = coef[1]
ave_err = np.empty((len(x) - N, N))
x_temp = np.empty(N)
for i in range(len(x) - N):
x_temp[0] = x[i] * slope + offset
for j in range(N -1):
x_temp[j + 1] = x_temp[j] * slope + offset
ave_err[i, :] = (x_temp - x[i:i+N])**2
return ave_err
In [2]:
x = sio.loadmat('Tut2_file1.mat')['x'].flatten()
plt.plot(x * 2, ',')
plt.xlabel('time')
plt.ylabel('x')
Out[2]:
In [27]:
X_p, x_T = set_data(1, x)
model = linear_model.LinearRegression()
model.fit(X_p, x_T)
model.coef_
Out[27]:
We can see that simulating the data as an AR(1) model is not effective in giving us anything similar the aquired data. This is due to the fact that we made the wrong assumptions when we computed the coefficients of our data. Our data is in fact clearly not a stationary process and in particular cannot be from an AR(1) model alone, as there is a linear trend in time. The meaning of the slope that we computed shows that successive data points are strongly correlated.
In [28]:
x_1 = AR(np.append(model.coef_, 0), [0, x[0]], 50001)
plt.plot(x_1[1:], ',')
plt.xlabel('time')
plt.ylabel('x')
Out[28]:
In [29]:
rgr = linear_model.LinearRegression()
x = x.reshape((len(x)), 1)
t = np.arange(len(x)).reshape(x.shape)
rgr.fit(t, x)
x_star= x - rgr.predict(t)
plt.plot(x_star.flatten(), ',')
plt.xlabel('time')
plt.ylabel('x')
Out[29]:
This time we obtain different coefficients, that we can use to simulate the data and see if they give us a similar result the real data.
In [30]:
X_p, x_T = set_data(1, x_star)
model.fit(X_p, x_T)
model.coef_
Out[30]:
In [31]:
x_1 = AR(np.append(model.coef_[0], 0), [0, x_star[0]], 50000)
plt.plot(x_1, ',')
plt.xlabel('time')
plt.ylabel('x')
Out[31]:
In [32]:
plt.plot(x_star[1:], x_star[:-1], ',')
plt.xlabel(r'x$_{t - 1}$')
plt.ylabel(r'x$_{t}$')
Out[32]:
In the next plot we can see that our predicted values have an error that decays exponentially the further we try to make a prediction. By the time it arrives to 5 time steps of distance it equal to the variance.
In [33]:
err = test_AR(x_star, model.coef_[0], 10)
np.sum(err, axis=0) / err.shape[0]
plt.plot(np.sum(err, axis=0) / err.shape[0], 'o', label='Error')
plt.plot([0, 10.], np.ones(2)* np.var(x_star), 'r', label='Variance')
plt.grid(linestyle='dotted')
plt.xlabel(r'$\Delta t$')
plt.ylabel('Error')
Out[33]:
In [34]:
x = sio.loadmat('Tut2_file2.mat')['x'].flatten()
plt.plot(x, ',')
plt.xlabel('time')
plt.ylabel('x')
np.mean(x)
Out[34]:
In [35]:
X_p, x_T = set_data(1, x)
model = linear_model.LinearRegression()
model.fit(X_p, x_T)
model.coef_
Out[35]:
We tried to simulate the data with these coefficients but it is clearly uneffective
In [36]:
x_1 = AR(model.coef_[0], x[:1], 50001)
plt.plot(x_1[1:], ',')
plt.xlabel('time')
plt.ylabel('x')
Out[36]:
By plotting the return plot we can better understand what is going on. The data can be divided in two parts. We can see that successive data is always around one of this two poles. If it were a real AR model we would expect something like the return plots shown below this one.
In [37]:
plt.plot(x[1:], x[:-1], ',')
plt.xlabel(r'x$_{t - 1}$')
plt.ylabel(r'x$_{t}$')
Out[37]:
In [38]:
plt.plot(x_star[1:], x_star[:-1], ',')
plt.xlabel(r'x$_{t - 1}$')
plt.ylabel(r'x$_{t}$')
Out[38]:
We can see that in the autocorelation plot the trend is exponential, which is what we would expect, but it is taking too long to decay for being a an AR model with small value of $p$
In [39]:
plt.plot(estimated_autocorrelation(x)[:200])
plt.xlabel(r'$\Delta$t')
plt.ylabel(r'$\rho$')
Out[39]:
In [40]:
plt.plot(estimated_autocorrelation(x_1.flatten())[:20])
plt.xlabel(r'$\Delta$t')
plt.ylabel(r'$\rho$')
Out[40]:
In [41]:
data = sio.loadmat('Tut2_file3.mat')
x_AR = data['x_AR'].flatten()
x_MA = data['x_MA'].flatten()
For computing the $\hat p$ for the AR model we predicted the parameters $a_i$ for various AR(5). We find that for p = 6 we do not have any correlation between previous values and future values.
In [42]:
for i in range(3,6):
X_p, x_T = set_data(i, x_AR)
model = linear_model.LinearRegression()
model.fit(X_p, x_T)
plt.plot(estimated_autocorrelation((x_T - model.predict(X_p)).flatten())[:20], \
label='AR(' + str(i) + ')')
plt.xlabel(r'$\Delta$t')
plt.ylabel(r'$\rho$')
plt.legend()
Out[42]:
For the MA $\hat q$ could be around 4-6
In [43]:
plt.plot(estimated_autocorrelation(x_MA)[:20])
plt.xlabel(r'$\Delta$t')
plt.ylabel(r'$\rho$')
Out[43]: